library(sets)
library(reshape2)
library(tidyverse)
library(stats)
library(psych)
library(readr)
library(knitr)
library(tibble)
library(readr)
library(outliers)
library(corrplot)
library(magrittr)
library(qwraps2)
library(arsenal)
library(naniar)
library(boot)
library(lme4)
library(lmerTest)
library(psych)
library(doBy)
library(car)
library(DescTools)
library(sjstats)
library(ggrepel)
library(dendextend)
max.theme <- theme_bw() + theme(axis.line = element_line(colour = "black"), panel.grid.minor = element_blank(), panel.grid.major = element_blank(), panel.border = element_blank(), panel.background = element_blank(), legend.title= element_blank())
Profanity <- read_csv("ProfanityClean.csv")
Profanity
The Data
This data is a corpus of high-frequency English words (N=1204) generated from the SUBTLEX word frequency database (Brysbaert & New, 2009), Janschewitz (2008), and socially stigmatized words generated by Concepts and Cognition lab members. Variables for each word are a combination of database ratings [e.g., concreteness from Brysbaert et al. (2013)], and participant ratings from Amazon Mechanical Turk.
Specifically, we enlisted 190 participants from Mechanical Turk (Amazon Inc.) to provide subjective ratings of tabooness and concreteness for subsets of the corpus (i.e., each participant rated approximately 500 words), including only experienced raters with Master-Level status. Sex distribution was roughly comparable (93F/97M), with a mean age of 38.56 years (SD = 10.05, range = 22–66).
In short, this is a database of more than 1200 words that include a range of words including profanity and non-profanity. The goal in formulating this dataset was to explore the form and meaning of taboo language in English.
Removing Specific Columns
#Keep Columns by Name (and drop all other columns)
LessProfanity1 <- subset(Profanity [c("Word","Taboo","Arousal","Valence")])
LessProfanity1
#Drop Columns by Number
LessProfanity2 <- subset(Profanity [-c(16:24)])
LessProfanity2
Removing Specific Rows
#Drop Row by Name
LessProfanity3<-subset(Profanity, Word!="balls" & Word!="pecker")
LessProfanity3
#Drop Rows by Certain Value
LessProfanity4<-subset(Profanity, Nlett < 8)
LessProfanity4
Summarizing Data with Piping (%>%)
SummaryStats1 <-Profanity %>%
group_by(Male)%>%
summarize(mean=mean(Arousal, na.rm=TRUE),
sd=sd(Arousal, na.rm=TRUE))
SummaryStats1
ggplot is system in R from the Tidyverse Package. This is probably the most common package for creating plots in R (as you will see with the following plot visualizations.)
ggplot is commonly used for:
Bar plots
Line graphs
Dot plots
Violin Plots
and Much More
Hightaboo<-subset(Profanity, Taboo > 4)
Hightaboo
#Make the Simple Plot
ggplot(Hightaboo, aes(Arousal, Taboo)) + geom_point(size = 3)
#Make the Simple Plot More Descriptive
Hightaboo$BodyActs <-as.factor(Hightaboo$BodyActs) #Make Variables a factor (Body Acts) so it can be coded as needed
ggplot(Hightaboo, aes(Arousal, Taboo, color=BodyActs)) + geom_point(size = 3) +
ylab("Tabooness") + xlab("Arousal") + scale_color_manual(values = c("black",
"green"))
#Make a Plot with Assigned Labels, Legend, and White Background
ggplot(Hightaboo, aes(Arousal, Taboo, color=BodyActs)) + geom_point(size = 3) +
ylab("Tabooness") + xlab("Arousal") + scale_color_manual(values = c("black",
"green")) + geom_text_repel(aes(label = Word), color = "black", size = 3) + theme(legend.position = c(.93,0.60)) + theme(legend.background = element_rect(fill = "white", color = "black")) +
theme(axis.line = element_line(colour = "black"), panel.border = element_blank(),
panel.background = element_blank()) + theme(panel.grid.minor = element_line(colour = "gray",
size = 0.1))
Profanity$Noun<-as.factor(Profanity$Noun)
#Make a Simple Plot
ggplot(Profanity, aes(Noun, Taboo, fill=Noun)) + geom_point(shape = 21)
#Make it Nicer
newvec <- c("black", "red")
ggplot(Profanity, aes(Noun, Taboo, shape = Noun, fill = Noun)) + geom_point(shape = 21,
color="green", size = 2.3, alpha = 0.6, position = position_jitter(w = 0.03,
h = 0)) + scale_fill_manual(values = newvec) + ylab("Tabooness") +
ylim(c(0, 10)) + stat_summary(fun.y = mean, fun.ymin = mean, fun.ymax = mean,
geom = "crossbar", color = "black", size = 0.4, width = 0.3) + max.theme
If we want to compare groups on a particular variable (e.g., Treatment vs. Control, Pre Vs. Post), we need a column that specifies which group the observations belongs in. Many times the dataset is already organized in this way. For example, our previous plots contrasted ‘Noun’ or ‘Not a Noun’ on Tabooness. For these plots, the category membership was pulled from the column ‘Noun’.
Sometimes we need to create a new column that specifies group membership. In our Profanity dataset, we don’t have a column that specifies which semantic category the word belongs to. Let’s make it:
#Create New Variable for Plot
taboocat <- Profanity %>%
mutate(SemanticCategory = ifelse(BodyActs== 1, "BodyActs",
ifelse(BodyPart==1, "BodyPart",
ifelse(Religious==1, "Religious",
ifelse(Disease==1, "Disease",
ifelse(SES==1, "SES",
ifelse(Mental==1, "Mental",NA)))))))
taboocat
Make a Bar Plot
ggplot(taboocat, aes(x = SemanticCategory, y = Taboo, fill = SemanticCategory)) + geom_bar(position = "dodge",
stat = "summary", fun.y = "mean") + stat_summary(fun.data = mean_se,
geom = "errorbar", width = 0.2, position = position_dodge(0.9))
Make a Violin Plot
newvec2 <-c("red", "orange", "green", "yellow", "blue", "purple", "white")
ggplot(taboocat, aes(x = SemanticCategory, y = Taboo, fill = SemanticCategory)) + geom_violin(color="blue", size=0.3) + scale_fill_manual(values = newvec2)+scale_y_continuous(limits = c(1, 7))+ ylab("Tabooness") + xlab("Semantic Category") + stat_summary(fun.y = mean,
fun.ymin = mean, fun.ymax = mean, geom = "crossbar", color = "black", size = 0.4,
width = 0.3)+max.theme
Another reason we might create a new variable for a plot is when we want to create a categorial variable using a continuous variable. Currently, we have Tabooness ratings on a Likert scale from 1-7. If want to contrast ‘High Tabooness’ vs. ‘Low Tabooness’ then we need to make a new variable (i.e., column) which specifies the membership of observations.
ScaledTaboo <- Profanity #new df for scaled score
ScaledTaboo$TabooZ <- scale(ScaledTaboo$Taboo) #create new column that is Z-score of Taboo
#Create Column for High and Low Taboo
HighLowTaboo <- ScaledTaboo %>%
mutate(ZCondition = ifelse(TabooZ > 2, "High",
ifelse(TabooZ < -0.548, "Low",NA)))
HighLowTaboo
Dendogram of High Taboo Words
raw_2 <- HighLowTaboo %>% select(1:4, 7:8,16:24,26) #isolate only the columns of interest- Words, High/Low Tabooness
hp <- raw_2 %>% filter(ZCondition == "High") %>% select(1:15) #filters rows only corresponding to one condition 'High' Tabooness
hp<-hp[1:50,]
hp2 <- hp[, -1] #create a new dataframe minus the first column (eliminate what will be rownames)
rownames(hp2) <- hp$Word #coerce the first column (Word) into rownames of the dataframe
hp.dist <- dist(hp2) #to distance matrix, runs hierarchical cluster analysis using complete linkage
hp.clust <- hclust(hp.dist, method = "ward.D")
hp.dend <- as.dendrogram(hp.clust)
plt.v1 <- hp.dend %>% set("labels_cex", 0.5) %>% set("branches_lwd", 1) %>% set("leaves_cex",
1) %>% set("branches_k_color", k = 3) %>% raise.dendrogram(2)
plot(plt.v1, horiz = T)
Dendogram of Low Taboo Words
raw_2 <- HighLowTaboo %>% select(1:4, 7:8,16:24,26) #isolate only the columns of interest- Words, High/Low Tabooness
lp <- raw_2 %>% filter(ZCondition == "Low") %>% select(1:15) #filters rows only corresponding to one condition 'Low'Tabooness
lp<-lp[1:50,]
lp2 <- lp[, -1] #create a new dataframe minus the first column (eliminate what will be rownames)
rownames(lp2) <- lp$Word #coerce the first column (Word) into rownames of the dataframe
lp.dist <- dist(lp2) #to distance matrix, runs hierarchical cluster analysis using complete linkage
lp.clust <- hclust(lp.dist, method = "ward.D")
lp.dend <- as.dendrogram(lp.clust)
plt.v1 <- lp.dend %>% set("labels_cex", 0.5) %>% set("branches_lwd", 1) %>% set("leaves_cex",
1) %>% set("branches_k_color", k = 3) %>% raise.dendrogram(2)
plot(plt.v1, horiz = T)
###Prep Data
CorrPrep <-Profanity [c(2:10)]
####Run Correlations
CorrPlot <-cor(CorrPrep, method = "pearson")
sig <- cor.mtest(CorrPrep, conf.level = .95) ##run correlations for each variable
###Make the Correlation Plot
colpal <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA")) #Option 1: Red to Blue
cont.plot<- corrplot(CorrPlot, method="color", col=colpal(100),
type="upper", order="hclust",
addCoef.col = "black", # Add coefficient of correlation
tl.col="black", tl.srt=45, #Text label color and rotation
# Combine with significance
p.mat = sig$p, sig.level = 0.05, insig = "blank",
# hide correlation coefficient on the principal diagonal
diag=FALSE )
colpal2 <-colorRampPalette(c("#4477AA","#77AADD","#FFFFFF","#EE9988", "#BB4444")) #Option 2: Blue to Red
cont.plot2<- corrplot(CorrPlot, method="color", col=colpal2(100),
type="upper", order="hclust",
addCoef.col = "black", # Add coefficient of correlation
tl.col="black", tl.srt=45, #Text label color and rotation
# Combine with significance
p.mat = sig$p, sig.level = 0.05, insig = "blank",
# hide correlation coefficient on the principal diagonal
diag=FALSE )
Make Individual Correlation and Plot in Grid
library(gridExtra)
#Creat Each Correlation Plot
plot1<- ggplot(CorrPrep, aes(x=CorrPrep$Freq, y=CorrPrep$Taboo)) +geom_smooth(method=lm, se=FALSE,color="grey")+
geom_point(size=2, color="steelblue3")+max.theme+labs(x="Freq", y = "Taboo")
plot2 <-ggplot(CorrPrep, aes(x=CorrPrep$Polysemy, y=CorrPrep$Taboo)) +geom_smooth(method=lm, se=FALSE,color="grey")+
geom_point(size=2, color="steelblue3")+max.theme+labs(x="Polysemy", y = "Taboo")
plot3<-ggplot(CorrPrep, aes(x=CorrPrep$CNC, y=CorrPrep$Taboo)) +geom_smooth(method=lm, se=FALSE,color="grey")+
geom_point(size=2, color="steelblue3")+max.theme+labs(x="CNC", y = "Taboo")
plot4<-ggplot(CorrPrep, aes(x=CorrPrep$Arousal, y=CorrPrep$Taboo)) +geom_smooth(method=lm, se=FALSE,color="grey")+
geom_point(size=2, color="steelblue3")+max.theme+labs(x="Arousal", y = "taboo")
#Put them all in one grid
grid.arrange(plot1, plot2,plot3,plot4, nrow = 3)